# plotly standard imports
import plotly.graph_objs as go
import chart_studio.plotly as py
# Cufflinks wrapper on plotly
import cufflinks
# Data science imports
import pandas as pd
import numpy as np
# Options for pandas
pd.options.display.max_columns = 30
# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
from plotly.offline import iplot, init_notebook_mode
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)
# Set global theme
cufflinks.set_config_file(world_readable=True, theme='pearl')
Will use target dataset Bitcoin Historical Data
Bitcoin data at 1-min intervals from select exchanges, Jan 2012 to Dec 2020
from src.load_datasets import load_input_dataset
input_dataset = load_input_dataset()
input_dataset.head()
| Timestamp | Open | High | Low | Close | Volume_(BTC) | Volume_(Currency) | Weighted_Price | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1325317920 | 4.39 | 4.39 | 4.39 | 4.39 | 0.455581 | 2.0 | 4.39 |
| 1 | 1325317980 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | 1325318040 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | 1325318100 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | 1325318160 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
Will explore full input dataset, some values contain NaN, which not ineraptebale by sweetviz, so will use timestamp as target feature for now
import sweetviz as sv
analyse_report = sv.analyze([input_dataset, 'Input'], target_feat="Timestamp")
analyse_report.show_notebook()
Will take one timestamp per hour for faster interpretation
hours_dataset = input_dataset[59::60]
timestamp need interprate as date for charts processing
raw_timestamps = hours_dataset.pop('Timestamp')
hours_datetime = pd.to_datetime(raw_timestamps, unit='s')
hours_dataset.head()
| Open | High | Low | Close | Volume_(BTC) | Volume_(Currency) | Weighted_Price | |
|---|---|---|---|---|---|---|---|
| 59 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 119 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 179 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 239 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 299 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
Feature evalution over time
hours_features = hours_dataset[['Open', 'Close', 'Weighted_Price', 'Volume_(BTC)', 'Volume_(Currency)']]
hours_features.index = hours_datetime
hours_features.iplot(
subplots=True,
)